# -*- coding:utf-8 -*-
#python 2.7
#XiaoDeng
#http://tieba.baidu.com/p/2460150866
#标签操作


from bs4 import BeautifulSoup
import requests
from questiondb import qudb


#如果是网址，可以用这个办法来读取网页
# html_doc = "http://zbajxc.zbnews.net/jq/20292335.aspx"
# # req = urllib.request.Request(html_doc)
# # webpage = urllib.request.urlopen(req)
# headers = {
#     'User-Agent': 'mozilla/5.0 (linux; u; android 4.1.2; zh-cn; mi-one plus build/jzo54k) applewebkit/534.30 (khtml, like gecko) version/4.0 mobile safari/534.30 micromessenger/5.0.1.352'
# }
# html = requests.get(html_doc,headers=headers).text
# print html

def GetPage():
    html_doc = "http://zbajxc.zbnews.net/jq/19708919.aspx?openid=ocnJg1CLvMI_F9FwAwjflBcXByvQ"
    # req = urllib.request.Request(html_doc)
    # # webpage = urllib.request.urlopen(req)
    headers = {
        'User-Agent': 'mozilla/5.0 (linux; u; android 4.1.2; zh-cn; mi-one plus build/jzo54k) applewebkit/534.30 (khtml, like gecko) version/4.0 mobile safari/534.30 micromessenger/5.0.1.352'
    }
    html = requests.get(html_doc, headers=headers).text

    return html

def getFile():
    with open('t1.txt') as f:
        html=f.read()
    return html

html=GetPage()
soup = BeautifulSoup(html, 'html.parser')   #文档对象

# 类名为xxx而且文本内容为hahaha的div
i=1
questions=[]
question_ids=[]
for k in soup.find_all('div',class_='div_title_question'):#,string='更多'
    questions.append([k['id'][len('divTitle'):],k.contents[0]]) #[0][-len('<span class="req"> *</span>')])
    question_ids.append(k['id'][len('divquestion'):])

answers={}
for j in soup.find_all('label'):
    if j.previous.name=='input':
        # print j.previous
        # print j.contents

        prev=j.previous
        p_id=prev['id']
        p_array=p_id.split('_')

        if not p_array[0] in answers:
            answers[p_array[0]]=[]

        answers[p_array[0]].append(j.contents[0])
    # if 'for' in j.attrs:
    #     #print j['for']
    #     q_id=j['for']
    #     q_array=q_id.split('_')
    #     #print q_array
    #     if not q_array[0] in answers:
    #         answers[q_array[0]]=[]
    #     answers[q_array[0]].append(j.contents[0])
    #print j.contents[0]




# for x in answers.keys():
#     print x
#     for x1 in answers[x]:
#         print x1

#print i

j=0

qu_db=qudb()
for q in questions[5:]:
    #print q[0],q[1]
    last_id=qu_db.InsertNewQuestion(q[0],q[1])
    q_s='q'+q[0]
    #print q_s
    if q_s in answers and last_id!=0:
        #print answers[q_s]
        for choice in answers[q_s]:
            qu_db.InsertNewOption(last_id,choice)
            #print choice

    j=j+1
    #<div class="atcTit_more"><span class="SG_more"><a href="http://blog.sina.com.cn/" target="_blank">更多&gt;&gt;</a></span></div>

print "总共有",j,"个题目"
print "爬取了",qu_db.insert_count,"个题目"